# =============================================================================
# APPENDIX G: SVOLIK (2012) REPLICATION ANALYSIS
# Reproduces Svolik's survival analysis using Cox proportional hazards models
# =============================================================================

# --- PACKAGES ----------------------------------------------------------------
library(dplyr)
library(haven)
library(survival)
library(msm)
library(stargazer)

# --- DATA LOADING ------------------------------------------------------------
# Load Svolik's original data
data <- read_dta("leaders, institutions, covariates, updated tvc.dta") %>%
  filter(!is.na(legislature))

# Load latent estimates 
latent <- read.csv("estimates_independent.csv", stringsAsFactors = FALSE)

# --- DATA PREPARATION --------------------------------------------------------
# Set t0 for each row
data <- data %>%
  group_by(leadid) %>%
  mutate(t0 = dplyr::lag(t, default = 0)) %>%
  ungroup()

# Ensure common merge keys and merge datasets
data <- data %>% mutate(COWcode = ccode)

df_new <- dplyr::inner_join(latent, data, by = c("COWcode", "year")) %>%
  arrange(COWcode, year) %>%
  group_by(COWcode) %>%
  mutate(
    dyn.estimates_lag = dplyr::lag(dyn.estimates, 1),
    dyn.up_lag        = dplyr::lag(dyn.up, 1),
    dyn.lo_lag        = dplyr::lag(dyn.lo, 1)
  ) %>%
  ungroup() %>%
  filter(!is.na(dyn.estimates_lag))

# --- ANALYSIS ----------------------------------------------------------------
# Original models on merged sample
survobj_coup_orig <- Surv(df_new[["t0"]], df_new[["_t"]], df_new[["c_coup"]])
coup_orig <- coxph(
  survobj_coup_orig ~ legislature + lgdp_1 + growth_1 +
    exportersoffuelsmainlyoil_EL2008 + ethfrac_FIXED +
    mil + cw + age,
  data = df_new, ties = "breslow"
)

survobj_revolt_orig <- Surv(df_new[["t0"]], df_new[["_t"]], df_new[["c_revolt"]])
revolt_orig <- coxph(
  survobj_revolt_orig ~ legislature + lgdp_1 + growth_1 +
    exportersoffuelsmainlyoil_EL2008 + ethfrac_FIXED +
    mil + cw + age,
  data = df_new, ties = "breslow"
)

survobj_natural_orig <- Surv(df_new[["t0"]], df_new[["_t"]], df_new[["c_natural"]])
natural_orig <- coxph(
  survobj_natural_orig ~ legislature + lgdp_1 + growth_1 +
    exportersoffuelsmainlyoil_EL2008 + ethfrac_FIXED +
    communist + mil + cw + age,
  data = df_new, ties = "breslow"
)

# Latent measure models (merged sample)
survobj_coup_latent <- Surv(df_new[["t0"]], df_new[["_t"]], df_new[["c_coup"]])
coups_latent <- coxph(
  survobj_coup_latent ~ dyn.estimates_lag + lgdp_1 + growth_1 +
    exportersoffuelsmainlyoil_EL2008 + ethfrac_FIXED +
    communist + mil + cw + age,
  data = df_new, ties = "breslow"
)

survobj_revolt_latent <- Surv(df_new[["t0"]], df_new[["_t"]], df_new[["c_revolt"]])
revolt_latent <- coxph(
  survobj_revolt_latent ~ dyn.estimates_lag + lgdp_1 + growth_1 +
    exportersoffuelsmainlyoil_EL2008 + ethfrac_FIXED +
    mil + cw + age,
  data = df_new, ties = "breslow"
)

survobj_natural_latent <- Surv(df_new[["t0"]], df_new[["_t"]], df_new[["c_natural"]])
natural_latent <- coxph(
  survobj_natural_latent ~ dyn.estimates_lag + lgdp_1 + growth_1 +
    exportersoffuelsmainlyoil_EL2008 + ethfrac_FIXED +
    communist + mil + cw + age,
  data = df_new, ties = "breslow"
)

# Helper function to compute HRs and SEs for all coefficients
hr_table <- function(model) {
  b  <- coef(model)
  V  <- vcov(model)
  hr <- exp(b)
  se <- sqrt((exp(b))^2 * diag(V))  # delta method: Var(exp(b)) = exp(2b)*Var(b)
  data.frame(Estimate = hr, SE = se)
}

# Calculate results
results_coups_orig   <- hr_table(coup_orig)
results_revolt_orig  <- hr_table(revolt_orig)
results_natural_orig <- hr_table(natural_orig)

results_coups_latent   <- hr_table(coups_latent)
results_revolt_latent  <- hr_table(revolt_latent)
results_natural_latent <- hr_table(natural_latent)

# --- OUTPUT ------------------------------------------------------------------
# LaTeX table: original models
stargazer(
  coup_orig, revolt_orig, natural_orig,
  coef = list(
    results_coups_orig$Estimate,
    results_revolt_orig$Estimate,
    results_natural_orig$Estimate
  ),
  se = list(
    results_coups_orig$SE,
    results_revolt_orig$SE,
    results_natural_orig$SE
  ),
  p.auto = FALSE,
  type = "latex",
  title = "Cox Proportional Hazards Model Results (Original Specification)",
  label = "tab:models_original",
  covariate.labels = c(
    "Legislature", "Log GDP (lag)", "Growth (lag)",
    "Exporters of Fuels Mainly Oil", "Ethnic Fractionalization",
    "Communist", "Military", "Cold War", "Age"
  ),
  dep.var.labels = c("Hazard Ratio (Coups)", "Hazard Ratio (Revolt)", "Hazard Ratio (Natural)"),
  no.space = TRUE,
  digits = 3
)

# LaTeX table: latent measure models
stargazer(
  coups_latent, revolt_latent, natural_latent,
  coef = list(
    results_coups_latent$Estimate,
    results_revolt_latent$Estimate,
    results_natural_latent$Estimate
  ),
  se = list(
    results_coups_latent$SE,
    results_revolt_latent$SE,
    results_natural_latent$SE
  ),
  p.auto = FALSE,
  type = "latex",
  title = "Cox Proportional Hazards Model Results (Latent Measure)",
  label = "tab:models_latent",
  covariate.labels = c(
    "Legislative Power Sharing", "Log GDP (lag)", "Growth (lag)",
    "Exporters of Fuels Mainly Oil", "Ethnic Fractionalization",
    "Communist", "Military", "Cold War", "Age"
  ),
  dep.var.labels = c("Hazard Ratio (Coups)", "Hazard Ratio (Revolt)", "Hazard Ratio (Natural)"),
  no.space = TRUE,
  digits = 3
)
